{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('pima_diabetes_dataset.csv')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 768 entries, 0 to 767\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Pregnancies 768 non-null int64 \n", " 1 Glucose 768 non-null int64 \n", " 2 BloodPressure 768 non-null int64 \n", " 3 SkinThickness 768 non-null int64 \n", " 4 Insulin 768 non-null int64 \n", " 5 BMI 768 non-null float64\n", " 6 DiabetesPedigreeFunction 768 non-null float64\n", " 7 Age 768 non-null int64 \n", " 8 Outcome 768 non-null int64 \n", "dtypes: float64(2), int64(7)\n", "memory usage: 54.1 KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',\n", " 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],\n", " dtype='object')" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.keys()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# feature = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']\n", "# prediction_class = ['species']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# X = data[feature].values\n", "# y = data[prediction_class].values" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "X = data.drop(columns=['Outcome'])\n", "y = data['Outcome']" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shape of X_test is (308, 8)\n", "Shape of X_train is (460, 8)\n", "Shape of Y_test is (308,)\n", "Shape of Y_train is (460,)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, Y_train, Y_test = train_test_split(X,y,test_size=0.40)\n", "print(f\"Shape of X_test is {X_test.shape}\")\n", "print(f\"Shape of X_train is {X_train.shape}\")\n", "print(f\"Shape of Y_test is {Y_test.shape}\")\n", "print(f\"Shape of Y_train is {Y_train.shape}\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeClassifier\n", "clf = DecisionTreeClassifier()\n", "clf.fit(X_train, Y_train)\n", "Y_pred = clf.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy 71.75324675324676\n" ] } ], "source": [ "from sklearn import metrics\n", "print(\"Accuracy\", metrics.accuracy_score(Y_test, Y_pred)*100)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.7" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }